# Computations
import numpy as np
import pandas as pd
# scipy
from scipy.stats import norm
# preprocessing
from sklearn import preprocessing
import re
# Visualisation libraries
## Text
from colorama import Fore, Back, Style
from IPython.display import Image, display, Markdown, Latex
## seaborn
import seaborn as sns
sns.set_context("paper", rc={"font.size":12,"axes.titlesize":14,"axes.labelsize":12})
sns.set_style("white")
## matplotlib
import matplotlib.pyplot as plt
from matplotlib.patches import Ellipse, Polygon
import matplotlib.gridspec as gridspec
import matplotlib.colors
plt.style.use('seaborn-whitegrid')
plt.rcParams['axes.labelsize'] = 14
plt.rcParams['xtick.labelsize'] = 12
plt.rcParams['ytick.labelsize'] = 12
plt.rcParams['text.color'] = 'k'
%matplotlib inline
## plotly
from plotly.offline import init_notebook_mode, iplot
import plotly.graph_objs as go
import plotly.offline as py
from plotly.subplots import make_subplots
import plotly.express as px
# Graphics in retina format
%config InlineBackend.figure_format = 'retina'
## Progress Bar
import progressbar
## missingno
import missingno as msno
import warnings
warnings.filterwarnings("ignore")
In this study, we analyze HR data available from kaggle.com
This data is fictional and it is created by IBM data scientists.
Categorical Parameters:
| 1 | 2 | 3 | 4 | 5 | |
|---|---|---|---|---|---|
| Education | Below College | College | Bachelor | Master | Doctor |
| Environment Satisfaction | Low | Medium | High | Very High | |
| Job Involvement | Low | Medium | High | Very High | |
| Job Satisfaction | Low | Medium | High | Very High | |
| Performance Rating | Low | Good | Excellent | Outstanding | |
| Relationship Satisfaction | Low | Medium | High | Very High | |
| WorkLife Balance | Bad | Good | Better | Best |
This can be encoded as follows,
Categorical_Dict = {'Education': {1:'Below College', 2:'College',3:'Bachelor', 4: 'Master', 5:'Doctor'},
'Environment Satisfaction': {1:'Low', 2:'Medium', 3:'High', 4:'Very High'},
'Job Involvement': {1:'Low', 2:'Medium', 3:'High', 4:'Very High'},
'Job Satisfaction': {1:'Low', 2:'Medium', 3:'High', 4:'Very High'},
'Performance Rating': {1:'Low', 2:'Good', 3:'Excellent', 4:'Outstanding'},
'Relationship Satisfaction': {1:'Low', 2:'Medium', 3:'High', 4:'Very High'},
'Work Life Balance': {1:'Bad', 2:'Good', 3:'Better', 4:'Best'}}
Data = pd.read_excel('Data/WA_Fn-UseC_-HR-Employee-Attrition.xlsx')
Temp = [re.sub(r"(\w)([A-Z])", r"\1 \2", x) for x in Data.columns]
Temp = [x.replace(' Curr ', ' Current ').replace('18',' 18').replace('Num ','Number Of ') for x in Temp]
Data.columns = Temp
del Temp
Labels = ['Remanined', 'Churned']
Data.head().style.hide_index()
First off, let's take a look at the dataset
def Data_info(Inp, Only_NaN = False):
Out = pd.DataFrame(Inp.dtypes,columns=['Data Type']).sort_values(by=['Data Type'])
Out = Out.join(pd.DataFrame(Inp.isnull().sum(), columns=['Number of NaN Values']), how='outer')
Out['Percentage'] = np.round(100*(Out['Number of NaN Values']/Inp.shape[0]),2)
if Only_NaN:
Out = Out.loc[Out['Number of NaN Values']>0]
return Out
display(Data_info(Data).T[:2])
_ = msno.bar(Data, figsize=(16,3), fontsize=14, log=False, color="#34495e")
Furthermore, the datatype for each columns
def dtypes_group(Inp):
Temp = Inp.dtypes.to_frame(name='Data Type').sort_values(by=['Data Type'])
Out = pd.DataFrame(index =Temp['Data Type'].unique(), columns = ['Columns'])
for c in Temp['Data Type'].unique():
Out.loc[Out.index == c, 'Columns'] = [Temp.loc[Temp['Data Type'] == c].index.tolist()]
return Out
dtypes = dtypes_group(Data)
display(dtypes)
_ = Data.hist(bins=30, grid=False, figsize=(18,18), color='#34495e', edgecolor='k', zorder=2, rwidth=0.8)
## Attrition Colormap
Att_Colors = ['LightSalmon', 'LightBlue']
Att_LC = 'Black'
# Gender Colormap
MF_Colors = ['HotPink', 'RoyalBlue']
MF_LC = 'Navy'
# Education
Ed_Colors = ['LightCoral','Khaki','GreenYellow','LimeGreen','ForestGreen']
Ed_LC = 'Black'
fig = px.histogram(Data, x = 'Age', color='Attrition', marginal= 'box', color_discrete_sequence= Att_Colors,
hover_data=Data.columns)
fig.update_layout(title = 'Credit Distribution Histogram', plot_bgcolor= 'white')
fig.update_traces(marker_line_color= Att_LC, marker_line_width=0.5, opacity=1)
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True,
showgrid=True, gridwidth=1, gridcolor='Lightgray')
fig['layout']['yaxis'].update(range=[0, 80])
fig.update_layout(title={'text': 'Age Distribution by Attrition',
'x':0.46, 'y':0.95,
'xanchor': 'center', 'yanchor': 'top'},
yaxis_title='Frequency')
fig.show()
bins = pd.IntervalIndex.from_tuples([(0, 25), (25, 40), (40, 45),(45, 60)])
Temp = Data[['Gender','Age','Attrition']]
Temp['Age'] = pd.cut(Temp['Age'], bins)
Temp = Temp.groupby(['Gender','Age','Attrition'])['Attrition'].agg({'count'}).rename(columns = {'count':'Count'})
Temp['Percentage'] = np.round(100* Temp.values /Temp.sum().values, 2)
Temp.reset_index(drop = False, inplace = True)
Temp = Temp.sort_values(by=['Age'])
Temp['Age'] = Temp['Age'].astype(str)
# Figures
fig = make_subplots(rows=2, cols=1, vertical_spacing = 0.08, shared_xaxes=True,
subplot_titles=('Attrition: No', 'Attrition: Yes'))
# Top
fig1 = px.bar(Temp.loc[Temp.Attrition == 'No'], y= 'Age', x= 'Percentage', orientation='h',
color = 'Gender', text = 'Percentage', hover_data= Temp.columns,
color_discrete_sequence = MF_Colors)
fig.add_trace(fig1['data'][0], row=1, col=1)
fig.add_trace(fig1['data'][1], row=1, col=1)
fig.update_traces(marker_line_color= MF_LC, marker_line_width=1, opacity=1, row=1, col=1)
# Bottom
fig2 = px.bar(Temp.loc[Temp.Attrition == 'Yes'], y= 'Age', x= 'Percentage', orientation='h',
color = 'Gender', text = 'Percentage', hover_data= Temp.columns,
color_discrete_sequence = MF_Colors)
fig.add_trace(fig2['data'][0], row=2, col=1)
fig.add_trace(fig2['data'][1], row=2, col=1)
fig.update_traces(marker_line_color= MF_LC, marker_line_width=1, opacity=1, showlegend = False, row=2, col=1)
# Update
fig.update_layout(height= 600)
fig.update_layout(plot_bgcolor= 'white', legend_orientation='h')
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='Lightgray')
fig.update_traces(texttemplate='%{text:.2}%', textposition='outside')
fig.update_xaxes(title_text='Percent', range=[0, 25], row=2, col=1)
fig.update_yaxes(title_text='Age', row=1, col=1)
fig.update_yaxes(title_text='Age', row=2, col=1)
fig.update_layout(title={'text': 'Age Distribution by Gender and Attrition',
'x':0.50, 'y':0.92,
'xanchor': 'center', 'yanchor': 'top'})
fig.show()
bins = pd.IntervalIndex.from_tuples([(0, 25), (25, 40), (40, 45),(45, 60)])
Temp = Data[['Education','Age','Attrition']]
Temp['Age'] = pd.cut(Temp['Age'], bins)
Temp = Temp.groupby(['Education','Age','Attrition'])['Attrition'].agg({'count'}).rename(columns = {'count':'Count'})
Temp['Percentage'] = np.round(100* Temp.values /Temp.sum().values, 2)
Temp.reset_index(drop = False, inplace = True)
Temp['Education Code'] = Temp['Education']
Temp['Education'] = Temp['Education'].replace(Categorical_Dict['Education'])
Temp = Temp[Temp.Percentage != 0]
Temp.sort_values(by=['Education Code','Age'], inplace = True)
Temp['Age'] = Temp['Age'].astype(str)
fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(13.5, 6),sharey = True)
_ = sns.barplot(ax= ax[0], x='Age', y='Percentage', hue='Education', data=Temp[Temp.Attrition == 'No'],
palette=sns.set_palette(Ed_Colors),
edgecolor= Ed_LC)
_ = ax[0].set_title('Attrition: No', fontsize = 16)
_ = ax[0].set_ylim([0,20])
_ = ax[0].legend().set_visible(False)
_ = sns.barplot(ax= ax[1], x='Age', y='Percentage', hue='Education', data=Temp[Temp.Attrition == 'Yes'],
palette=sns.set_palette(Ed_Colors),
edgecolor= Ed_LC)
_ = ax[1].set_title('Attrition: Yes', fontsize = 16)
_ = ax[1].legend(bbox_to_anchor=(1, 0.55), loc='center left',fontsize = 14)
_ = ax[1].set_ylabel(None)
plt.subplots_adjust(wspace=0.05)
for a in ax:
_ = a.tick_params(labelsize=14)
# _ = a.set_xticklabels(a.get_xticklabels(), rotation=45)
plt.subplots_adjust(wspace=0.05)
Temp = Data.groupby(['Gender','Business Travel','Attrition'])['Attrition'].agg({'count'}).rename(columns = {'count':'Count'})
Temp['Percentage'] = np.round(100* Temp.values /Temp.sum().values, 2)
Temp.reset_index(drop = False, inplace = True)
Temp = Temp.sort_values(by=['Business Travel'])
# Figures
fig = make_subplots(rows=2, cols=1, vertical_spacing = 0.08, shared_xaxes=True,
subplot_titles=('Attrition: No', 'Attrition: Yes'))
# Top
fig1 = px.bar(Temp.loc[Temp.Attrition == 'No'], y= 'Business Travel', x= 'Percentage', orientation='h',
color = 'Gender', text = 'Percentage', hover_data= Temp.columns,
color_discrete_sequence = MF_Colors)
fig.add_trace(fig1['data'][0], row=1, col=1)
fig.add_trace(fig1['data'][1], row=1, col=1)
fig.update_traces(marker_line_color= MF_LC, marker_line_width=1, opacity=1, row=1, col=1)
# Bottom
fig2 = px.bar(Temp.loc[Temp.Attrition == 'Yes'], y= 'Business Travel', x= 'Percentage', orientation='h',
color = 'Gender', text = 'Percentage', hover_data= Temp.columns,
color_discrete_sequence = MF_Colors)
fig.add_trace(fig2['data'][0], row=2, col=1)
fig.add_trace(fig2['data'][1], row=2, col=1)
fig.update_traces(marker_line_color= MF_LC, marker_line_width=1, opacity=1, showlegend = False, row=2, col=1)
# Update
fig.update_layout(height= 500)
fig.update_layout(plot_bgcolor= 'white', legend_orientation='h')
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='Lightgray')
fig.update_traces(texttemplate='%{text:.2}%', textposition='outside')
fig.update_xaxes(title_text='Percent', range=[0, 40], row=2, col=1)
fig.update_yaxes(title_text='Business Travel', row=1, col=1)
fig.update_yaxes(title_text='Business Travel', row=2, col=1)
fig.update_layout(title={'text': 'Business Travel Distribution by Gender and Attrition',
'x':0.50, 'y':0.92,
'xanchor': 'center', 'yanchor': 'top'})
fig.show()
Temp = Data.groupby(['Gender','Department','Attrition'])['Attrition'].agg({'count'}).rename(columns = {'count':'Count'})
Temp['Percentage'] = np.round(100* Temp.values /Temp.sum().values, 2)
Temp.reset_index(drop = False, inplace = True)
Temp = Temp.sort_values(by=['Department'])
# Figures
fig = make_subplots(rows=2, cols=1, vertical_spacing = 0.08, shared_xaxes=True,
subplot_titles=('Attrition: No', 'Attrition: Yes'))
# Top
fig1 = px.bar(Temp.loc[Temp.Attrition == 'No'], y= 'Department', x= 'Percentage', orientation='h',
color = 'Gender', text = 'Percentage', hover_data= Temp.columns,
color_discrete_sequence = MF_Colors)
fig.add_trace(fig1['data'][0], row=1, col=1)
fig.add_trace(fig1['data'][1], row=1, col=1)
fig.update_traces(marker_line_color= MF_LC, marker_line_width=1, opacity=1, row=1, col=1)
# Bottom
fig2 = px.bar(Temp.loc[Temp.Attrition == 'Yes'], y= 'Department', x= 'Percentage', orientation='h',
color = 'Gender', text = 'Percentage', hover_data= Temp.columns,
color_discrete_sequence = MF_Colors)
fig.add_trace(fig2['data'][0], row=2, col=1)
fig.add_trace(fig2['data'][1], row=2, col=1)
fig.update_traces(marker_line_color= MF_LC, marker_line_width=1, opacity=1, showlegend = False, row=2, col=1)
# Update
fig.update_layout(height= 500)
fig.update_layout(plot_bgcolor= 'white', legend_orientation='h')
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='Lightgray')
fig.update_traces(texttemplate='%{text:.2}%', textposition='outside')
fig.update_xaxes(title_text='Percent', range=[0, 35], row=2, col=1)
fig.update_yaxes(title_text='Department', row=1, col=1)
fig.update_yaxes(title_text='Department', row=2, col=1)
fig.update_layout(title={'text': 'Department Distribution by Gender and Attrition',
'x':0.50, 'y':0.92,
'xanchor': 'center', 'yanchor': 'top'})
fig.show()
Temp = Data[['Education','Department','Attrition']]
Temp = Temp.groupby(['Education','Department','Attrition'])['Attrition'].agg({'count'}).rename(columns = {'count':'Count'})
Temp['Percentage'] = np.round(100* Temp.values /Temp.sum().values, 2)
Temp.reset_index(drop = False, inplace = True)
Temp['Education Code'] = Temp['Education']
Temp['Education'] = Temp['Education'].replace(Categorical_Dict['Education'])
Temp = Temp[Temp.Percentage != 0]
Temp.sort_values(by=['Education Code','Department'], inplace = True)
Temp['Department'] = Temp['Department'].map(lambda x: x.replace('&','and').replace(' ','\n'))
fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(13.5, 6),sharey = True)
_ = sns.barplot(ax= ax[0], x='Department', y='Percentage', hue='Education', data=Temp[Temp.Attrition == 'No'],
palette=sns.set_palette(Ed_Colors),
edgecolor= Ed_LC)
_ = ax[0].set_title('Attrition: No', fontsize = 16)
_ = ax[0].set_ylim([0,25])
_ = ax[0].legend().set_visible(False)
_ = sns.barplot(ax= ax[1], x='Department', y='Percentage', hue='Education', data=Temp[Temp.Attrition == 'Yes'],
palette=sns.set_palette(Ed_Colors),
edgecolor= Ed_LC)
_ = ax[1].set_title('Attrition: Yes', fontsize = 16)
_ = ax[1].legend(bbox_to_anchor=(1, 0.55), loc='center left',fontsize = 14)
_ = ax[1].set_ylabel(None)
plt.subplots_adjust(wspace=0.05)
for a in ax:
_ = a.tick_params(labelsize=14)
# _ = a.set_xticklabels(a.get_xticklabels(), rotation=45)
plt.subplots_adjust(wspace=0.05)
fig = px.histogram(Data, x = 'Distance From Home', color='Attrition', marginal= 'box', color_discrete_sequence= Att_Colors,
hover_data=Data.columns)
fig.update_layout(title = 'Credit Distribution Histogram', plot_bgcolor= 'white')
fig.update_traces(marker_line_color= Att_LC, marker_line_width=0.5, opacity=1)
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True,
showgrid=True, gridwidth=1, gridcolor='Lightgray')
fig['layout']['yaxis'].update(range=[0, 250])
fig.update_layout(title={'text': 'Distance From Home Distribution by Attrition',
'x':0.46, 'y':0.95,
'xanchor': 'center', 'yanchor': 'top'},
yaxis_title='Frequency')
fig.show()
bins = pd.IntervalIndex.from_tuples([(0, 5), (5, 10), (10, 20),(20, 30)])
Temp = Data[['Gender','Distance From Home','Attrition']]
Temp['Distance From Home'] = pd.cut(Temp['Distance From Home'], bins)
Temp = Temp.groupby(['Gender','Distance From Home','Attrition'])['Attrition'].agg({'count'}).rename(columns = {'count':'Count'})
Temp['Percentage'] = np.round(100* Temp.values /Temp.sum().values, 2)
Temp.reset_index(drop = False, inplace = True)
Temp = Temp.sort_values(by=['Distance From Home'])
Temp['Distance From Home'] = Temp['Distance From Home'].astype(str)
# Figures
fig = make_subplots(rows=2, cols=1, vertical_spacing = 0.08, shared_xaxes=True,
subplot_titles=('Attrition: No', 'Attrition: Yes'))
# Top
fig1 = px.bar(Temp.loc[Temp.Attrition == 'No'], y= 'Distance From Home', x= 'Percentage', orientation='h',
color = 'Gender', text = 'Percentage', hover_data= Temp.columns,
color_discrete_sequence = MF_Colors)
fig.add_trace(fig1['data'][0], row=1, col=1)
fig.add_trace(fig1['data'][1], row=1, col=1)
fig.update_traces(marker_line_color= MF_LC, marker_line_width=1, opacity=1, row=1, col=1)
# Bottom
fig2 = px.bar(Temp.loc[Temp.Attrition == 'Yes'], y= 'Distance From Home', x= 'Percentage', orientation='h',
color = 'Gender', text = 'Percentage', hover_data= Temp.columns,
color_discrete_sequence = MF_Colors)
fig.add_trace(fig2['data'][0], row=2, col=1)
fig.add_trace(fig2['data'][1], row=2, col=1)
fig.update_traces(marker_line_color= MF_LC, marker_line_width=1, opacity=1, showlegend = False, row=2, col=1)
# Update
fig.update_layout(height= 550)
fig.update_layout(plot_bgcolor= 'white', legend_orientation='h')
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='Lightgray')
fig.update_traces(texttemplate='%{text:.2}%', textposition='outside')
fig.update_xaxes(title_text='Percent', range=[0, 25], row=2, col=1)
fig.update_yaxes(title_text='Distance From Home', row=1, col=1)
fig.update_yaxes(title_text='Distance From Home', row=2, col=1)
fig.update_layout(title={'text': 'Distance From Home Distribution by Gender and Attrition',
'x':0.50, 'y':0.92,
'xanchor': 'center', 'yanchor': 'top'})
fig.show()
Temp = Data[['Gender','Education','Attrition']]
Temp['Education'] = Temp['Education'].replace(Categorical_Dict['Education'])
Temp = Temp.groupby(['Gender','Education','Attrition'])['Attrition'].agg({'count'}).rename(columns = {'count':'Count'})
Temp['Percentage'] = np.round(100* Temp.values /Temp.sum().values, 2)
Temp.reset_index(drop = False, inplace = True)
Temp = Temp.sort_values(by=['Education'])
# Figures
fig = make_subplots(rows=2, cols=1, vertical_spacing = 0.05, shared_xaxes=True,
subplot_titles=('Attrition: No', 'Attrition: Yes'))
# Top
fig1 = px.bar(Temp.loc[Temp.Attrition == 'No'], y= 'Education', x= 'Percentage', orientation='h',
color = 'Gender', text = 'Percentage', hover_data= Temp.columns,
color_discrete_sequence = MF_Colors)
fig.add_trace(fig1['data'][0], row=1, col=1)
fig.add_trace(fig1['data'][1], row=1, col=1)
fig.update_traces(marker_line_color= MF_LC, marker_line_width=1, opacity=1, row=1, col=1)
# Bottom
fig2 = px.bar(Temp.loc[Temp.Attrition == 'Yes'], y= 'Education', x= 'Percentage', orientation='h',
color = 'Gender', text = 'Percentage', hover_data= Temp.columns,
color_discrete_sequence = MF_Colors)
fig.add_trace(fig2['data'][0], row=2, col=1)
fig.add_trace(fig2['data'][1], row=2, col=1)
fig.update_traces(marker_line_color= MF_LC, marker_line_width=1, opacity=1, showlegend = False, row=2, col=1)
# Update
fig.update_layout(height= 700)
fig.update_layout(plot_bgcolor= 'white', legend_orientation='h')
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='Lightgray')
fig.update_traces(texttemplate='%{text:.2}%', textposition='outside')
fig.update_xaxes(title_text='Percent', range=[0, 20], row=2, col=1)
fig.update_yaxes(title_text='Education', row=1, col=1)
fig.update_yaxes(title_text='Education', row=2, col=1)
fig.update_layout(title={'text': 'Education Distribution by Gender and Attrition',
'x':0.50, 'y':0.92,
'xanchor': 'center', 'yanchor': 'top'})
fig.show()
Temp = Data.groupby(['Gender','Education Field','Attrition'])['Attrition'].agg({'count'}).rename(columns = {'count':'Count'})
Temp['Percentage'] = np.round(100* Temp.values /Temp.sum().values, 2)
Temp.reset_index(drop = False, inplace = True)
Temp = Temp.sort_values(by=['Education Field'])
# Figures
fig = make_subplots(rows=2, cols=1, vertical_spacing = 0.05, shared_xaxes=True,
subplot_titles=('Attrition: No', 'Attrition: Yes'))
# Top
fig1 = px.bar(Temp.loc[Temp.Attrition == 'No'], y= 'Education Field', x= 'Percentage', orientation='h',
color = 'Gender', text = 'Percentage', hover_data= Temp.columns,
color_discrete_sequence = MF_Colors)
fig.add_trace(fig1['data'][0], row=1, col=1)
fig.add_trace(fig1['data'][1], row=1, col=1)
fig.update_traces(marker_line_color= MF_LC, marker_line_width=1, opacity=1, row=1, col=1)
# Bottom
fig2 = px.bar(Temp.loc[Temp.Attrition == 'Yes'], y= 'Education Field', x= 'Percentage', orientation='h',
color = 'Gender', text = 'Percentage', hover_data= Temp.columns,
color_discrete_sequence = MF_Colors)
fig.add_trace(fig2['data'][0], row=2, col=1)
fig.add_trace(fig2['data'][1], row=2, col=1)
fig.update_traces(marker_line_color= MF_LC, marker_line_width=1, opacity=1, showlegend = False, row=2, col=1)
# Update
fig.update_layout(height= 700)
fig.update_layout(plot_bgcolor= 'white', legend_orientation='h')
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='Lightgray')
fig.update_traces(texttemplate='%{text:.2}%', textposition='outside')
fig.update_xaxes(title_text='Percent', range=[0, 25], row=2, col=1)
fig.update_yaxes(title_text='Education Field', row=1, col=1)
fig.update_yaxes(title_text='Education Field', row=2, col=1)
fig.update_layout(title={'text': 'Education Field Distribution by Gender and Attrition',
'x':0.50, 'y':0.92,
'xanchor': 'center', 'yanchor': 'top'})
fig.show()
Temp = Data[['Education','Education Field','Attrition']]
Temp = Temp.groupby(['Education','Education Field','Attrition'])['Attrition'].agg({'count'}).rename(columns = {'count':'Count'})
Temp['Percentage'] = np.round(100* Temp.values /Temp.sum().values, 2)
Temp.reset_index(drop = False, inplace = True)
Temp['Education Code'] = Temp['Education']
Temp['Education'] = Temp['Education'].replace(Categorical_Dict['Education'])
Temp = Temp[Temp.Percentage != 0]
Temp.sort_values(by=['Education Code','Education Field'], inplace = True)
Temp['Education Field'] = Temp['Education Field'].map(lambda x: x.replace('&','and').replace(' ','\n'))
fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(14, 6),sharey = True)
_ = sns.barplot(ax= ax[0], x='Education Field', y='Percentage', hue='Education', data=Temp[Temp.Attrition == 'No'],
palette=sns.set_palette(Ed_Colors),
edgecolor= Ed_LC)
_ = ax[0].set_title('Attrition: No', fontsize = 16)
_ = ax[0].set_ylim([0,14])
_ = ax[0].legend().set_visible(False)
_ = sns.barplot(ax= ax[1], x='Education Field', y='Percentage', hue='Education', data=Temp[Temp.Attrition == 'Yes'],
palette=sns.set_palette(Ed_Colors),
edgecolor= Ed_LC)
_ = ax[1].set_title('Attrition: Yes', fontsize = 16)
_ = ax[1].legend(bbox_to_anchor=(1, 0.55), loc='center left',fontsize = 14)
_ = ax[1].set_ylabel(None)
plt.subplots_adjust(wspace=0.05)
for a in ax:
_ = a.tick_params(labelsize=14)
# _ = a.set_xticklabels(a.get_xticklabels(), rotation=45)
plt.subplots_adjust(wspace=0.05)
Temp = Data[['Gender','Environment Satisfaction','Attrition']]
Temp['Environment Satisfaction'] = Temp['Environment Satisfaction'].replace(Categorical_Dict['Environment Satisfaction'])
Temp = Temp.groupby(['Gender','Environment Satisfaction','Attrition'])['Attrition'].agg({'count'}).rename(columns = {'count':'Count'})
Temp['Percentage'] = np.round(100* Temp.values /Temp.sum().values, 2)
Temp.reset_index(drop = False, inplace = True)
Temp = Temp.sort_values(by=['Environment Satisfaction'])
# Figures
fig = make_subplots(rows=2, cols=1, vertical_spacing = 0.05, shared_xaxes=True,
subplot_titles=('Attrition: No', 'Attrition: Yes'))
# Top
fig1 = px.bar(Temp.loc[Temp.Attrition == 'No'], y= 'Environment Satisfaction', x= 'Percentage', orientation='h',
color = 'Gender', text = 'Percentage', hover_data= Temp.columns,
color_discrete_sequence = MF_Colors)
fig.add_trace(fig1['data'][0], row=1, col=1)
fig.add_trace(fig1['data'][1], row=1, col=1)
fig.update_traces(marker_line_color= MF_LC, marker_line_width=1, opacity=1, row=1, col=1)
# Bottom
fig2 = px.bar(Temp.loc[Temp.Attrition == 'Yes'], y= 'Environment Satisfaction', x= 'Percentage', orientation='h',
color = 'Gender', text = 'Percentage', hover_data= Temp.columns,
color_discrete_sequence = MF_Colors)
fig.add_trace(fig2['data'][0], row=2, col=1)
fig.add_trace(fig2['data'][1], row=2, col=1)
fig.update_traces(marker_line_color= MF_LC, marker_line_width=1, opacity=1, showlegend = False, row=2, col=1)
# Update
fig.update_layout(height= 600)
fig.update_layout(plot_bgcolor= 'white', legend_orientation='h')
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='Lightgray')
fig.update_traces(texttemplate='%{text:.2}%', textposition='outside')
fig.update_xaxes(title_text='Percent', range=[0, 20], row=2, col=1)
fig.update_yaxes(title_text='Environment Satisfaction', row=1, col=1)
fig.update_yaxes(title_text='Environment Satisfaction', row=2, col=1)
fig.update_layout(title={'text': 'Environment Satisfaction Distribution by Gender and Attrition',
'x':0.50, 'y':0.92,
'xanchor': 'center', 'yanchor': 'top'})
fig.show()
fig = px.histogram(Data, x = 'Hourly Rate', color='Attrition', marginal= 'box', color_discrete_sequence= Att_Colors,
hover_data=Data.columns)
fig.update_layout(title = 'Credit Distribution Histogram', plot_bgcolor= 'white')
fig.update_traces(marker_line_color= Att_LC, marker_line_width=0.5, opacity=1)
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True,
showgrid=True, gridwidth=1, gridcolor='Lightgray')
fig['layout']['yaxis'].update(range=[0, 140])
fig.update_layout(title={'text': 'Hourly Rate Distribution by Attrition',
'x':0.46, 'y':0.95,
'xanchor': 'center', 'yanchor': 'top'},
yaxis_title='Frequency')
fig.show()
bins = pd.IntervalIndex.from_tuples([(25, 50), (50, 75), (75, 100)])
Temp = Data[['Gender','Hourly Rate','Attrition']]
Temp['Hourly Rate'] = pd.cut(Temp['Hourly Rate'], bins)
Temp = Temp.groupby(['Gender','Hourly Rate','Attrition'])['Attrition'].agg({'count'}).rename(columns = {'count':'Count'})
Temp['Percentage'] = np.round(100* Temp.values /Temp.sum().values, 2)
Temp.reset_index(drop = False, inplace = True)
Temp = Temp.sort_values(by=['Hourly Rate'])
Temp['Hourly Rate'] = Temp['Hourly Rate'].astype(str)
# Figures
fig = make_subplots(rows=2, cols=1, vertical_spacing = 0.05, shared_xaxes=True,
subplot_titles=('Attrition: No', 'Attrition: Yes'))
# Top
fig1 = px.bar(Temp.loc[Temp.Attrition == 'No'], y= 'Hourly Rate', x= 'Percentage', orientation='h',
color = 'Gender', text = 'Percentage', hover_data= Temp.columns,
color_discrete_sequence = MF_Colors)
fig.add_trace(fig1['data'][0], row=1, col=1)
fig.add_trace(fig1['data'][1], row=1, col=1)
fig.update_traces(marker_line_color= MF_LC, marker_line_width=1, opacity=1, row=1, col=1)
# Bottom
fig2 = px.bar(Temp.loc[Temp.Attrition == 'Yes'], y= 'Hourly Rate', x= 'Percentage', orientation='h',
color = 'Gender', text = 'Percentage', hover_data= Temp.columns,
color_discrete_sequence = MF_Colors)
fig.add_trace(fig2['data'][0], row=2, col=1)
fig.add_trace(fig2['data'][1], row=2, col=1)
fig.update_traces(marker_line_color= MF_LC, marker_line_width=1, opacity=1, showlegend = False, row=2, col=1)
# Update
fig.update_layout(height= 500)
fig.update_layout(plot_bgcolor= 'white', legend_orientation='h')
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='Lightgray')
fig.update_traces(texttemplate='%{text:.2}%', textposition='outside')
fig.update_xaxes(title_text='Percent', range=[0, 20], row=2, col=1)
fig.update_yaxes(title_text='Hourly Rate', row=1, col=1)
fig.update_yaxes(title_text='Hourly Rate', row=2, col=1)
fig.update_layout(title={'text': 'Hourly Rate Distribution by Gender and Attrition',
'x':0.50, 'y':0.92,
'xanchor': 'center', 'yanchor': 'top'})
fig.show()
bins = pd.IntervalIndex.from_tuples([(25, 50), (50, 75), (75, 100)])
Temp = Data[['Education','Hourly Rate','Attrition']]
Temp['Hourly Rate'] = pd.cut(Temp['Hourly Rate'], bins)
Temp = Temp.groupby(['Education','Hourly Rate','Attrition'])['Attrition'].agg({'count'}).rename(columns = {'count':'Count'})
Temp['Percentage'] = np.round(100* Temp.values /Temp.sum().values, 2)
Temp.reset_index(drop = False, inplace = True)
Temp['Education Code'] = Temp['Education']
Temp['Education'] = Temp['Education'].replace(Categorical_Dict['Education'])
Temp = Temp[Temp.Percentage != 0]
Temp.sort_values(by=['Education Code','Hourly Rate'], inplace = True)
Temp['Hourly Rate'] = Temp['Hourly Rate'].astype(str)
fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(14, 6),sharey = True)
_ = sns.barplot(ax= ax[0], x='Hourly Rate', y='Percentage', hue='Education', data=Temp[Temp.Attrition == 'No'],
palette=sns.set_palette(Ed_Colors),
edgecolor= Ed_LC)
_ = ax[0].set_title('Attrition: No', fontsize = 16)
_ = ax[0].set_ylim([0,14])
_ = ax[0].legend().set_visible(False)
_ = sns.barplot(ax= ax[1], x='Hourly Rate', y='Percentage', hue='Education', data=Temp[Temp.Attrition == 'Yes'],
palette=sns.set_palette(Ed_Colors),
edgecolor= Ed_LC)
_ = ax[1].set_title('Attrition: Yes', fontsize = 16)
_ = ax[1].legend(bbox_to_anchor=(1, 0.55), loc='center left',fontsize = 14)
_ = ax[1].set_ylabel(None)
plt.subplots_adjust(wspace=0.05)
for a in ax:
_ = a.tick_params(labelsize=14)
# _ = a.set_xticklabels(a.get_xticklabels(), rotation=45)
plt.subplots_adjust(wspace=0.05)
Temp = Data[['Gender','Job Involvement','Attrition']]
Temp['Job Involvement'] = Temp['Job Involvement'].replace(Categorical_Dict['Job Involvement'])
Temp = Temp.groupby(['Gender','Job Involvement','Attrition'])['Attrition'].agg({'count'}).rename(columns = {'count':'Count'})
Temp['Percentage'] = np.round(100* Temp.values /Temp.sum().values, 2)
Temp.reset_index(drop = False, inplace = True)
Temp = Temp.sort_values(by=['Job Involvement'])
# Figures
fig = make_subplots(rows=2, cols=1, vertical_spacing = 0.05, shared_xaxes=True,
subplot_titles=('Attrition: No', 'Attrition: Yes'))
# Top
fig1 = px.bar(Temp.loc[Temp.Attrition == 'No'], y= 'Job Involvement', x= 'Percentage', orientation='h',
color = 'Gender', text = 'Percentage', hover_data= Temp.columns,
color_discrete_sequence = MF_Colors)
fig.add_trace(fig1['data'][0], row=1, col=1)
fig.add_trace(fig1['data'][1], row=1, col=1)
fig.update_traces(marker_line_color= MF_LC, marker_line_width=1, opacity=1, row=1, col=1)
# Bottom
fig2 = px.bar(Temp.loc[Temp.Attrition == 'Yes'], y= 'Job Involvement', x= 'Percentage', orientation='h',
color = 'Gender', text = 'Percentage', hover_data= Temp.columns,
color_discrete_sequence = MF_Colors)
fig.add_trace(fig2['data'][0], row=2, col=1)
fig.add_trace(fig2['data'][1], row=2, col=1)
fig.update_traces(marker_line_color= MF_LC, marker_line_width=1, opacity=1, showlegend = False, row=2, col=1)
# Update
fig.update_layout(height= 600)
fig.update_layout(plot_bgcolor= 'white', legend_orientation='h')
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='Lightgray')
fig.update_traces(texttemplate='%{text:.2}%', textposition='outside')
fig.update_xaxes(title_text='Percent', range=[0, 20], row=2, col=1)
fig.update_yaxes(title_text='Job Involvement', row=1, col=1)
fig.update_yaxes(title_text='Job Involvement', row=2, col=1)
fig.update_layout(title={'text': 'Job Involvement Distribution by Gender and Attrition',
'x':0.50, 'y':0.92,
'xanchor': 'center', 'yanchor': 'top'})
fig.show()
Temp = Data[['Gender','Job Level','Attrition']]
Temp = Temp.groupby(['Gender','Job Level','Attrition'])['Attrition'].agg({'count'}).rename(columns = {'count':'Count'})
Temp['Percentage'] = np.round(100* Temp.values /Temp.sum().values, 2)
Temp.reset_index(drop = False, inplace = True)
Temp = Temp.sort_values(by=['Job Level'])
# Figures
fig = make_subplots(rows=2, cols=1, vertical_spacing = 0.05, shared_xaxes=True,
subplot_titles=('Attrition: No', 'Attrition: Yes'))
# Top
fig1 = px.bar(Temp.loc[Temp.Attrition == 'No'], y= 'Job Level', x= 'Percentage', orientation='h',
color = 'Gender', text = 'Percentage', hover_data= Temp.columns,
color_discrete_sequence = MF_Colors)
fig.add_trace(fig1['data'][0], row=1, col=1)
fig.add_trace(fig1['data'][1], row=1, col=1)
fig.update_traces(marker_line_color= MF_LC, marker_line_width=1, opacity=1, row=1, col=1)
# Bottom
fig2 = px.bar(Temp.loc[Temp.Attrition == 'Yes'], y= 'Job Level', x= 'Percentage', orientation='h',
color = 'Gender', text = 'Percentage', hover_data= Temp.columns,
color_discrete_sequence = MF_Colors)
fig.add_trace(fig2['data'][0], row=2, col=1)
fig.add_trace(fig2['data'][1], row=2, col=1)
fig.update_traces(marker_line_color= MF_LC, marker_line_width=1, opacity=1, showlegend = False, row=2, col=1)
# Update
fig.update_layout(height= 700)
fig.update_layout(plot_bgcolor= 'white', legend_orientation='h')
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='Lightgray')
fig.update_traces(texttemplate='%{text:.2}%', textposition='outside')
fig.update_xaxes(title_text='Percent', range=[0, 20], row=2, col=1)
fig.update_yaxes(title_text='Job Level', row=1, col=1)
fig.update_yaxes(title_text='Job Level', row=2, col=1)
fig.update_layout(title={'text': 'Job Level Distribution by Gender and Attrition',
'x':0.50, 'y':0.92,
'xanchor': 'center', 'yanchor': 'top'})
fig.show()
Temp = Data[['Education','Job Level','Attrition']]
Temp = Temp.groupby(['Education','Job Level','Attrition'])['Attrition'].agg({'count'}).rename(columns = {'count':'Count'})
Temp['Percentage'] = np.round(100* Temp.values /Temp.sum().values, 2)
Temp.reset_index(drop = False, inplace = True)
Temp = Temp.sort_values(by=['Job Level'])
Temp['Education Code'] = Temp['Education']
Temp['Education'] = Temp['Education'].replace(Categorical_Dict['Education'])
Temp = Temp[Temp.Percentage != 0]
Temp.sort_values(by=['Education Code','Job Level'], inplace = True)
fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(14, 6),sharey = True)
_ = sns.barplot(ax= ax[0], x='Job Level', y='Percentage', hue='Education', data=Temp[Temp.Attrition == 'No'],
palette=sns.set_palette(Ed_Colors),
edgecolor= Ed_LC)
_ = ax[0].set_title('Attrition: No', fontsize = 16)
_ = ax[0].set_ylim([0,12])
_ = ax[0].legend().set_visible(False)
_ = sns.barplot(ax= ax[1], x='Job Level', y='Percentage', hue='Education', data=Temp[Temp.Attrition == 'Yes'],
palette=sns.set_palette(Ed_Colors),
edgecolor= Ed_LC)
_ = ax[1].set_title('Attrition: Yes', fontsize = 16)
_ = ax[1].legend(bbox_to_anchor=(1, 0.55), loc='center left',fontsize = 14)
_ = ax[1].set_ylabel(None)
plt.subplots_adjust(wspace=0.05)
for a in ax:
_ = a.tick_params(labelsize=14)
# _ = a.set_xticklabels(a.get_xticklabels(), rotation=45)
plt.subplots_adjust(wspace=0.05)
Temp = Data[['Gender','Job Role','Attrition']]
Temp = Temp.groupby(['Gender','Job Role','Attrition'])['Attrition'].agg({'count'}).rename(columns = {'count':'Count'})
Temp['Percentage'] = np.round(100* Temp.values /Temp.sum().values, 2)
Temp.reset_index(drop = False, inplace = True)
Temp = Temp.sort_values(by=['Job Role'])
# Figures
fig = make_subplots(rows=2, cols=1, vertical_spacing = 0.05, shared_xaxes=True,
subplot_titles=('Attrition: No', 'Attrition: Yes'))
# Top
fig1 = px.bar(Temp.loc[Temp.Attrition == 'No'], y= 'Job Role', x= 'Percentage', orientation='h',
color = 'Gender', text = 'Percentage', hover_data= Temp.columns,
color_discrete_sequence = MF_Colors)
fig.add_trace(fig1['data'][0], row=1, col=1)
fig.add_trace(fig1['data'][1], row=1, col=1)
fig.update_traces(marker_line_color= MF_LC, marker_line_width=1, opacity=1, row=1, col=1)
# Bottom
fig2 = px.bar(Temp.loc[Temp.Attrition == 'Yes'], y= 'Job Role', x= 'Percentage', orientation='h',
color = 'Gender', text = 'Percentage', hover_data= Temp.columns,
color_discrete_sequence = MF_Colors)
fig.add_trace(fig2['data'][0], row=2, col=1)
fig.add_trace(fig2['data'][1], row=2, col=1)
fig.update_traces(marker_line_color= MF_LC, marker_line_width=1, opacity=1, showlegend = False, row=2, col=1)
# Update
fig.update_layout(height= 1000)
fig.update_layout(plot_bgcolor= 'white', legend_orientation='h')
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='Lightgray')
fig.update_traces(texttemplate='%{text:.2}%', textposition='outside')
fig.update_xaxes(title_text='Percent', range=[0, 12], row=2, col=1)
fig.update_yaxes(title_text='Job Role', row=1, col=1)
fig.update_yaxes(title_text='Job Role', row=2, col=1)
fig.update_layout(title={'text': 'Job Role Distribution by Gender and Attrition',
'x':0.50, 'y':0.92,
'xanchor': 'center', 'yanchor': 'top'})
fig.show()
Temp = Data[['Gender','Job Satisfaction','Attrition']]
Temp['Job Satisfaction'] = Temp['Job Satisfaction'].replace(Categorical_Dict['Job Satisfaction'])
Temp = Temp.groupby(['Gender','Job Satisfaction','Attrition'])['Attrition'].agg({'count'}).rename(columns = {'count':'Count'})
Temp['Percentage'] = np.round(100* Temp.values /Temp.sum().values, 2)
Temp.reset_index(drop = False, inplace = True)
Temp = Temp.sort_values(by=['Job Satisfaction'])
# Figures
fig = make_subplots(rows=2, cols=1, vertical_spacing = 0.05, shared_xaxes=True,
subplot_titles=('Attrition: No', 'Attrition: Yes'))
# Top
fig1 = px.bar(Temp.loc[Temp.Attrition == 'No'], y= 'Job Satisfaction', x= 'Percentage', orientation='h',
color = 'Gender', text = 'Percentage', hover_data= Temp.columns,
color_discrete_sequence = MF_Colors)
fig.add_trace(fig1['data'][0], row=1, col=1)
fig.add_trace(fig1['data'][1], row=1, col=1)
fig.update_traces(marker_line_color= MF_LC, marker_line_width=1, opacity=1, row=1, col=1)
# Bottom
fig2 = px.bar(Temp.loc[Temp.Attrition == 'Yes'], y= 'Job Satisfaction', x= 'Percentage', orientation='h',
color = 'Gender', text = 'Percentage', hover_data= Temp.columns,
color_discrete_sequence = MF_Colors)
fig.add_trace(fig2['data'][0], row=2, col=1)
fig.add_trace(fig2['data'][1], row=2, col=1)
fig.update_traces(marker_line_color= MF_LC, marker_line_width=1, opacity=1, showlegend = False, row=2, col=1)
# Update
fig.update_layout(height= 600)
fig.update_layout(plot_bgcolor= 'white', legend_orientation='h')
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='Lightgray')
fig.update_traces(texttemplate='%{text:.2}%', textposition='outside')
fig.update_xaxes(title_text='Percent', range=[0, 20], row=2, col=1)
fig.update_yaxes(title_text='Job Satisfaction', row=1, col=1)
fig.update_yaxes(title_text='Job Satisfaction', row=2, col=1)
fig.update_layout(title={'text': 'Job Satisfaction Distribution by Gender and Attrition',
'x':0.50, 'y':0.92,
'xanchor': 'center', 'yanchor': 'top'})
fig.show()
Temp = Data[['Education','Job Satisfaction','Attrition']]
Temp = Temp.groupby(['Education','Job Satisfaction','Attrition'])['Attrition'].agg({'count'}).rename(columns = {'count':'Count'})
Temp['Percentage'] = np.round(100* Temp.values /Temp.sum().values, 2)
Temp.reset_index(drop = False, inplace = True)
Temp = Temp.sort_values(by=['Job Satisfaction'])
Temp = Temp[Temp.Percentage != 0]
Temp['Education Code'] = Temp['Education']
Temp['Education'] = Temp['Education'].replace(Categorical_Dict['Education'])
Temp['Job Satisfaction'] = Temp['Job Satisfaction'].replace(Categorical_Dict['Job Satisfaction'])
Temp.sort_values(by=['Education Code','Job Satisfaction'], inplace = True)
fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(14, 6),sharey = True)
_ = sns.barplot(ax= ax[0], x='Job Satisfaction', y='Percentage', hue='Education', data=Temp[Temp.Attrition == 'No'],
palette=sns.set_palette(Ed_Colors),
edgecolor= Ed_LC)
_ = ax[0].set_title('Attrition: No', fontsize = 16)
_ = ax[0].set_ylim([0,10])
_ = ax[0].legend().set_visible(False)
_ = sns.barplot(ax= ax[1], x='Job Satisfaction', y='Percentage', hue='Education', data=Temp[Temp.Attrition == 'Yes'],
palette=sns.set_palette(Ed_Colors),
edgecolor= Ed_LC)
_ = ax[1].set_title('Attrition: Yes', fontsize = 16)
_ = ax[1].legend(bbox_to_anchor=(1, 0.55), loc='center left',fontsize = 14)
_ = ax[1].set_ylabel(None)
plt.subplots_adjust(wspace=0.05)
for a in ax:
_ = a.tick_params(labelsize=14)
# _ = a.set_xticklabels(a.get_xticklabels(), rotation=45)
plt.subplots_adjust(wspace=0.05)
Temp = Data[['Gender','Marital Status','Attrition']]
Temp = Temp.groupby(['Gender','Marital Status','Attrition'])['Attrition'].agg({'count'}).rename(columns = {'count':'Count'})
Temp['Percentage'] = np.round(100* Temp.values /Temp.sum().values, 2)
Temp.reset_index(drop = False, inplace = True)
Temp = Temp.sort_values(by=['Marital Status'])
# Figures
fig = make_subplots(rows=2, cols=1, vertical_spacing = 0.08, shared_xaxes=True,
subplot_titles=('Attrition: No', 'Attrition: Yes'))
# Top
fig1 = px.bar(Temp.loc[Temp.Attrition == 'No'], y= 'Marital Status', x= 'Percentage', orientation='h',
color = 'Gender', text = 'Percentage', hover_data= Temp.columns,
color_discrete_sequence = MF_Colors)
fig.add_trace(fig1['data'][0], row=1, col=1)
fig.add_trace(fig1['data'][1], row=1, col=1)
fig.update_traces(marker_line_color= MF_LC, marker_line_width=1, opacity=1, row=1, col=1)
# Bottom
fig2 = px.bar(Temp.loc[Temp.Attrition == 'Yes'], y= 'Marital Status', x= 'Percentage', orientation='h',
color = 'Gender', text = 'Percentage', hover_data= Temp.columns,
color_discrete_sequence = MF_Colors)
fig.add_trace(fig2['data'][0], row=2, col=1)
fig.add_trace(fig2['data'][1], row=2, col=1)
fig.update_traces(marker_line_color= MF_LC, marker_line_width=1, opacity=1, showlegend = False, row=2, col=1)
# Update
fig.update_layout(height= 500)
fig.update_layout(plot_bgcolor= 'white', legend_orientation='h')
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='Lightgray')
fig.update_traces(texttemplate='%{text:.2}%', textposition='outside')
fig.update_xaxes(title_text='Percent', range=[0, 25], row=2, col=1)
fig.update_yaxes(title_text='Marital Status', row=1, col=1)
fig.update_yaxes(title_text='Marital Status', row=2, col=1)
fig.update_layout(title={'text': 'Marital Status Distribution by Gender and Attrition',
'x':0.50, 'y':0.92,
'xanchor': 'center', 'yanchor': 'top'})
fig.show()
Temp = Data[['Education','Marital Status','Attrition']]
Temp = Temp.groupby(['Education','Marital Status','Attrition'])['Attrition'].agg({'count'}).rename(columns = {'count':'Count'})
Temp['Percentage'] = np.round(100* Temp.values /Temp.sum().values, 2)
Temp.reset_index(drop = False, inplace = True)
Temp = Temp.sort_values(by=['Marital Status'])
Temp = Temp[Temp.Percentage != 0]
Temp['Education Code'] = Temp['Education']
Temp['Education'] = Temp['Education'].replace(Categorical_Dict['Education'])
Temp.sort_values(by=['Education Code','Marital Status'], inplace = True)
fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(13.5, 6),sharey = True)
_ = sns.barplot(ax= ax[0], x='Marital Status', y='Percentage', hue='Education', data=Temp[Temp.Attrition == 'No'],
palette=sns.set_palette(Ed_Colors),
edgecolor= Ed_LC)
_ = ax[0].set_title('Attrition: No', fontsize = 16)
_ = ax[0].set_ylim([0,20])
_ = ax[0].legend().set_visible(False)
_ = sns.barplot(ax= ax[1], x='Marital Status', y='Percentage', hue='Education', data=Temp[Temp.Attrition == 'Yes'],
palette=sns.set_palette(Ed_Colors),
edgecolor= Ed_LC)
_ = ax[1].set_title('Attrition: Yes', fontsize = 16)
_ = ax[1].legend(bbox_to_anchor=(1, 0.55), loc='center left',fontsize = 14)
_ = ax[1].set_ylabel(None)
plt.subplots_adjust(wspace=0.05)
for a in ax:
_ = a.tick_params(labelsize=14)
# _ = a.set_xticklabels(a.get_xticklabels(), rotation=45)
plt.subplots_adjust(wspace=0.05)
Temp = Data[['Gender','Number Of Companies Worked','Attrition']]
Temp = Temp.groupby(['Gender','Number Of Companies Worked','Attrition'])['Attrition'].agg({'count'}).rename(columns = {'count':'Count'})
Temp['Percentage'] = np.round(100* Temp.values /Temp.sum().values, 2)
Temp.reset_index(drop = False, inplace = True)
Temp = Temp.sort_values(by=['Number Of Companies Worked'])
# Figures
fig = make_subplots(rows=2, cols=1, vertical_spacing = 0.05, shared_xaxes=True,
subplot_titles=('Attrition: No', 'Attrition: Yes'))
# Top
fig1 = px.bar(Temp.loc[Temp.Attrition == 'No'], y= 'Number Of Companies Worked', x= 'Percentage', orientation='h',
color = 'Gender', text = 'Percentage', hover_data= Temp.columns,
color_discrete_sequence = MF_Colors)
fig.add_trace(fig1['data'][0], row=1, col=1)
fig.add_trace(fig1['data'][1], row=1, col=1)
fig.update_traces(marker_line_color= MF_LC, marker_line_width=1, opacity=1, row=1, col=1)
# Bottom
fig2 = px.bar(Temp.loc[Temp.Attrition == 'Yes'], y= 'Number Of Companies Worked', x= 'Percentage', orientation='h',
color = 'Gender', text = 'Percentage', hover_data= Temp.columns,
color_discrete_sequence = MF_Colors)
fig.add_trace(fig2['data'][0], row=2, col=1)
fig.add_trace(fig2['data'][1], row=2, col=1)
fig.update_traces(marker_line_color= MF_LC, marker_line_width=1, opacity=1, showlegend = False, row=2, col=1)
# Update
fig.update_layout(height= 1000)
fig.update_layout(plot_bgcolor= 'white', legend_orientation='h')
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='Lightgray')
fig.update_traces(texttemplate='%{text:.2}%', textposition='outside')
fig.update_xaxes(title_text='Percent', range=[0, 20], row=2, col=1)
fig.update_yaxes(title_text='Number Of Companies Worked', row=1, col=1)
fig.update_yaxes(title_text='Number Of Companies Worked', row=2, col=1)
fig.update_layout(title={'text': 'Number Of Companies Worked Distribution by Gender and Attrition',
'x':0.50, 'y':0.95,
'xanchor': 'center', 'yanchor': 'top'})
fig.show()
Temp = Data[['Gender','Over Time','Attrition']]
Temp = Temp.groupby(['Gender','Over Time','Attrition'])['Attrition'].agg({'count'}).rename(columns = {'count':'Count'})
Temp['Percentage'] = np.round(100* Temp.values /Temp.sum().values, 2)
Temp.reset_index(drop = False, inplace = True)
Temp = Temp.sort_values(by=['Over Time'])
# Figures
fig = make_subplots(rows=2, cols=1, vertical_spacing = 0.08, shared_xaxes=True,
subplot_titles=('Attrition: No', 'Attrition: Yes'))
# Top
fig1 = px.bar(Temp.loc[Temp.Attrition == 'No'], y= 'Over Time', x= 'Percentage', orientation='h',
color = 'Gender', text = 'Percentage', hover_data= Temp.columns,
color_discrete_sequence = MF_Colors)
fig.add_trace(fig1['data'][0], row=1, col=1)
fig.add_trace(fig1['data'][1], row=1, col=1)
fig.update_traces(marker_line_color= MF_LC, marker_line_width=1, opacity=1, row=1, col=1)
# Bottom
fig2 = px.bar(Temp.loc[Temp.Attrition == 'Yes'], y= 'Over Time', x= 'Percentage', orientation='h',
color = 'Gender', text = 'Percentage', hover_data= Temp.columns,
color_discrete_sequence = MF_Colors)
fig.add_trace(fig2['data'][0], row=2, col=1)
fig.add_trace(fig2['data'][1], row=2, col=1)
fig.update_traces(marker_line_color= MF_LC, marker_line_width=1, opacity=1, showlegend = False, row=2, col=1)
# Update
fig.update_layout(height= 450)
fig.update_layout(plot_bgcolor= 'white', legend_orientation='h')
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='Lightgray')
fig.update_traces(texttemplate='%{text:.2}%', textposition='inside')
fig.update_xaxes(title_text='Percent', range=[0, 40], row=2, col=1)
fig.update_yaxes(title_text='Over Time', row=1, col=1)
fig.update_yaxes(title_text='Over Time', row=2, col=1)
fig.update_layout(title={'text': 'Over Time Distribution by Gender and Attrition',
'x':0.50, 'y':0.92,
'xanchor': 'center', 'yanchor': 'top'})
fig.show()
fig = px.histogram(Data, x = 'Percent Salary Hike', color='Attrition', marginal= 'box', color_discrete_sequence= Att_Colors,
hover_data=Data.columns)
fig.update_layout(title = 'Credit Distribution Histogram', plot_bgcolor= 'white')
fig.update_traces(marker_line_color= Att_LC, marker_line_width=0.5, opacity=1)
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True,
showgrid=True, gridwidth=1, gridcolor='Lightgray')
fig['layout']['yaxis'].update(range=[0, 250])
fig.update_layout(title={'text': 'Percent Salary Hike Distribution by Attrition',
'x':0.46, 'y':0.95,
'xanchor': 'center', 'yanchor': 'top'},
yaxis_title='Frequency')
fig.show()
bins = pd.IntervalIndex.from_tuples([(10, 15), (15, 20), (20, 25)])
Temp = Data[['Gender','Percent Salary Hike','Attrition']]
Temp['Percent Salary Hike'] = pd.cut(Temp['Percent Salary Hike'], bins)
Temp = Temp.groupby(['Gender','Percent Salary Hike','Attrition'])['Attrition'].agg({'count'}).rename(columns = {'count':'Count'})
Temp['Percentage'] = np.round(100* Temp.values /Temp.sum().values, 2)
Temp.reset_index(drop = False, inplace = True)
Temp = Temp.sort_values(by=['Percent Salary Hike'])
Temp['Percent Salary Hike'] = Temp['Percent Salary Hike'].astype(str)
# Figures
fig = make_subplots(rows=2, cols=1, vertical_spacing = 0.08, shared_xaxes=True,
subplot_titles=('Attrition: No', 'Attrition: Yes'))
# Top
fig1 = px.bar(Temp.loc[Temp.Attrition == 'No'], y= 'Percent Salary Hike', x= 'Percentage', orientation='h',
color = 'Gender', text = 'Percentage', hover_data= Temp.columns,
color_discrete_sequence = MF_Colors)
fig.add_trace(fig1['data'][0], row=1, col=1)
fig.add_trace(fig1['data'][1], row=1, col=1)
fig.update_traces(marker_line_color= MF_LC, marker_line_width=1, opacity=1, row=1, col=1)
# Bottom
fig2 = px.bar(Temp.loc[Temp.Attrition == 'Yes'], y= 'Percent Salary Hike', x= 'Percentage', orientation='h',
color = 'Gender', text = 'Percentage', hover_data= Temp.columns,
color_discrete_sequence = MF_Colors)
fig.add_trace(fig2['data'][0], row=2, col=1)
fig.add_trace(fig2['data'][1], row=2, col=1)
fig.update_traces(marker_line_color= MF_LC, marker_line_width=1, opacity=1, showlegend = False, row=2, col=1)
# Update
fig.update_layout(height= 500)
fig.update_layout(plot_bgcolor= 'white', legend_orientation='h')
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='Lightgray')
fig.update_traces(texttemplate='%{text:.2}%', textposition='outside')
fig.update_xaxes(title_text='Percent', range=[0, 35], row=2, col=1)
fig.update_yaxes(title_text='Percent Salary Hike', row=1, col=1)
fig.update_yaxes(title_text='Percent Salary Hike', row=2, col=1)
fig.update_layout(title={'text': 'Percent Salary Hike Distribution by Gender and Attrition',
'x':0.50, 'y':0.92,
'xanchor': 'center', 'yanchor': 'top'})
fig.show()
Temp = Data[['Gender','Performance Rating','Attrition']]
Temp['Performance Rating'] = Temp['Performance Rating'].replace(Categorical_Dict['Performance Rating'])
Temp = Temp.groupby(['Gender','Performance Rating','Attrition'])['Attrition'].agg({'count'}).rename(columns = {'count':'Count'})
Temp['Percentage'] = np.round(100* Temp.values /Temp.sum().values, 2)
Temp.reset_index(drop = False, inplace = True)
Temp = Temp.sort_values(by=['Performance Rating'])
# Figures
fig = make_subplots(rows=2, cols=1, vertical_spacing = 0.08, shared_xaxes=True,
subplot_titles=('Attrition: No', 'Attrition: Yes'))
# Top
fig1 = px.bar(Temp.loc[Temp.Attrition == 'No'], y= 'Performance Rating', x= 'Percentage', orientation='h',
color = 'Gender', text = 'Percentage', hover_data= Temp.columns,
color_discrete_sequence = MF_Colors)
fig.add_trace(fig1['data'][0], row=1, col=1)
fig.add_trace(fig1['data'][1], row=1, col=1)
fig.update_traces(marker_line_color= MF_LC, marker_line_width=1, opacity=1, row=1, col=1)
# Bottom
fig2 = px.bar(Temp.loc[Temp.Attrition == 'Yes'], y= 'Performance Rating', x= 'Percentage', orientation='h',
color = 'Gender', text = 'Percentage', hover_data= Temp.columns,
color_discrete_sequence = MF_Colors)
fig.add_trace(fig2['data'][0], row=2, col=1)
fig.add_trace(fig2['data'][1], row=2, col=1)
fig.update_traces(marker_line_color= MF_LC, marker_line_width=1, opacity=1, showlegend = False, row=2, col=1)
# Update
fig.update_layout(height= 420)
fig.update_layout(plot_bgcolor= 'white', legend_orientation='h')
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='Lightgray')
fig.update_traces(texttemplate='%{text:.2}%', textposition='outside')
fig.update_xaxes(title_text='Percent', range=[0, 45], row=2, col=1)
fig.update_yaxes(title_text='Performance<br>Rating', row=1, col=1)
fig.update_yaxes(title_text='Performance<br>Rating', row=2, col=1)
fig.update_layout(title={'text': 'Performance Rating Distribution by Gender and Attrition',
'x':0.50, 'y':0.92,
'xanchor': 'center', 'yanchor': 'top'})
fig.show()
Temp = Data[['Gender','Relationship Satisfaction','Attrition']]
Temp['Relationship Satisfaction'] = Temp['Relationship Satisfaction'].replace(Categorical_Dict['Relationship Satisfaction'])
Temp = Temp.groupby(['Gender','Relationship Satisfaction','Attrition'])['Attrition'].agg({'count'}).rename(columns = {'count':'Count'})
Temp['Percentage'] = np.round(100* Temp.values /Temp.sum().values, 2)
Temp.reset_index(drop = False, inplace = True)
Temp = Temp.sort_values(by=['Relationship Satisfaction'])
# Figures
fig = make_subplots(rows=2, cols=1, vertical_spacing = 0.05, shared_xaxes=True,
subplot_titles=('Attrition: No', 'Attrition: Yes'))
# Top
fig1 = px.bar(Temp.loc[Temp.Attrition == 'No'], y= 'Relationship Satisfaction', x= 'Percentage', orientation='h',
color = 'Gender', text = 'Percentage', hover_data= Temp.columns,
color_discrete_sequence = MF_Colors)
fig.add_trace(fig1['data'][0], row=1, col=1)
fig.add_trace(fig1['data'][1], row=1, col=1)
fig.update_traces(marker_line_color= MF_LC, marker_line_width=1, opacity=1, row=1, col=1)
# Bottom
fig2 = px.bar(Temp.loc[Temp.Attrition == 'Yes'], y= 'Relationship Satisfaction', x= 'Percentage', orientation='h',
color = 'Gender', text = 'Percentage', hover_data= Temp.columns,
color_discrete_sequence = MF_Colors)
fig.add_trace(fig2['data'][0], row=2, col=1)
fig.add_trace(fig2['data'][1], row=2, col=1)
fig.update_traces(marker_line_color= MF_LC, marker_line_width=1, opacity=1, showlegend = False, row=2, col=1)
# Update
fig.update_layout(height= 600)
fig.update_layout(plot_bgcolor= 'white', legend_orientation='h')
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='Lightgray')
fig.update_traces(texttemplate='%{text:.2}%', textposition='outside')
fig.update_xaxes(title_text='Percent', range=[0, 20], row=2, col=1)
fig.update_yaxes(title_text='Relationship Satisfaction', row=1, col=1)
fig.update_yaxes(title_text='Relationship Satisfaction', row=2, col=1)
fig.update_layout(title={'text': 'Relationship Satisfaction Distribution by Gender and Attrition',
'x':0.50, 'y':0.92,
'xanchor': 'center', 'yanchor': 'top'})
fig.show()
Temp = Data[['Gender','Stock Option Level','Attrition']]
Temp = Temp.groupby(['Gender','Stock Option Level','Attrition'])['Attrition'].agg({'count'}).rename(columns = {'count':'Count'})
Temp['Percentage'] = np.round(100* Temp.values /Temp.sum().values, 2)
Temp.reset_index(drop = False, inplace = True)
Temp = Temp.sort_values(by=['Stock Option Level'])
# Figures
fig = make_subplots(rows=2, cols=1, vertical_spacing = 0.08, shared_xaxes=True,
subplot_titles=('Attrition: No', 'Attrition: Yes'))
# Top
fig1 = px.bar(Temp.loc[Temp.Attrition == 'No'], y= 'Stock Option Level', x= 'Percentage', orientation='h',
color = 'Gender', text = 'Percentage', hover_data= Temp.columns,
color_discrete_sequence = MF_Colors)
fig.add_trace(fig1['data'][0], row=1, col=1)
fig.add_trace(fig1['data'][1], row=1, col=1)
fig.update_traces(marker_line_color= MF_LC, marker_line_width=1, opacity=1, row=1, col=1)
# Bottom
fig2 = px.bar(Temp.loc[Temp.Attrition == 'Yes'], y= 'Stock Option Level', x= 'Percentage', orientation='h',
color = 'Gender', text = 'Percentage', hover_data= Temp.columns,
color_discrete_sequence = MF_Colors)
fig.add_trace(fig2['data'][0], row=2, col=1)
fig.add_trace(fig2['data'][1], row=2, col=1)
fig.update_traces(marker_line_color= MF_LC, marker_line_width=1, opacity=1, showlegend = False, row=2, col=1)
# Update
fig.update_layout(height= 680)
fig.update_layout(plot_bgcolor= 'white', legend_orientation='h')
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='Lightgray')
fig.update_traces(texttemplate='%{text:.2}%', textposition='outside')
fig.update_xaxes(title_text='Percent', range=[0, 25], row=2, col=1)
fig.update_yaxes(title_text='Stock Option Level', row=1, col=1)
fig.update_yaxes(title_text='Stock Option Level', row=2, col=1)
fig.update_layout(title={'text': 'Stock Option Level Distribution by Gender and Attrition',
'x':0.50, 'y':0.92,
'xanchor': 'center', 'yanchor': 'top'})
fig.show()
fig = px.histogram(Data, x = 'Total Working Years', color='Attrition', marginal= 'box', color_discrete_sequence= Att_Colors,
hover_data=Data.columns)
fig.update_layout(title = 'Credit Distribution Histogram', plot_bgcolor= 'white')
fig.update_traces(marker_line_color= Att_LC, marker_line_width=0.5, opacity=1)
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True,
showgrid=True, gridwidth=1, gridcolor='Lightgray')
fig['layout']['yaxis'].update(range=[0, 250])
fig.update_layout(title={'text': 'Total Working Years Distribution by Attrition',
'x':0.46, 'y':0.95,
'xanchor': 'center', 'yanchor': 'top'},
yaxis_title='Frequency')
fig.show()
bins = pd.IntervalIndex.from_tuples([(-1, 10), (10, 20), (20, 30), (30, 40)])
Temp = Data[['Gender','Total Working Years','Attrition']]
Temp['Total Working Years'] = pd.cut(Temp['Total Working Years'], bins)
Temp = Temp.groupby(['Gender','Total Working Years','Attrition'])['Attrition'].agg({'count'}).rename(columns = {'count':'Count'})
Temp['Percentage'] = np.round(100* Temp.values /Temp.sum().values, 2)
Temp.reset_index(drop = False, inplace = True)
Temp = Temp.sort_values(by=['Total Working Years'])
Temp['Total Working Years'] = Temp['Total Working Years'].astype(str)
# Figures
fig = make_subplots(rows=2, cols=1, vertical_spacing = 0.08, shared_xaxes=True,
subplot_titles=('Attrition: No', 'Attrition: Yes'))
# Top
fig1 = px.bar(Temp.loc[Temp.Attrition == 'No'], y= 'Total Working Years', x= 'Percentage', orientation='h',
color = 'Gender', text = 'Percentage', hover_data= Temp.columns,
color_discrete_sequence = MF_Colors)
fig.add_trace(fig1['data'][0], row=1, col=1)
fig.add_trace(fig1['data'][1], row=1, col=1)
fig.update_traces(marker_line_color= MF_LC, marker_line_width=1, opacity=1, row=1, col=1)
# Bottom
fig2 = px.bar(Temp.loc[Temp.Attrition == 'Yes'], y= 'Total Working Years', x= 'Percentage', orientation='h',
color = 'Gender', text = 'Percentage', hover_data= Temp.columns,
color_discrete_sequence = MF_Colors)
fig.add_trace(fig2['data'][0], row=2, col=1)
fig.add_trace(fig2['data'][1], row=2, col=1)
fig.update_traces(marker_line_color= MF_LC, marker_line_width=1, opacity=1, showlegend = False, row=2, col=1)
# Update
fig.update_layout(height= 600)
fig.update_layout(plot_bgcolor= 'white', legend_orientation='h')
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='Lightgray')
fig.update_traces(texttemplate='%{text:.2}%', textposition='outside')
fig.update_xaxes(title_text='Percent', range=[0, 35], row=2, col=1)
fig.update_yaxes(title_text='Total Working Years', row=1, col=1)
fig.update_yaxes(title_text='Total Working Years', row=2, col=1)
fig.update_layout(title={'text': 'Total Working Years Distribution by Gender and Attrition',
'x':0.50, 'y':0.92,
'xanchor': 'center', 'yanchor': 'top'})
fig.show()
fig = px.histogram(Data, x = 'Total Working Years', color='Attrition', marginal= 'box', color_discrete_sequence= Att_Colors,
hover_data=Data.columns)
fig.update_layout(title = 'Credit Distribution Histogram', plot_bgcolor= 'white')
fig.update_traces(marker_line_color= Att_LC, marker_line_width=0.5, opacity=1)
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True,
showgrid=True, gridwidth=1, gridcolor='Lightgray')
fig['layout']['yaxis'].update(range=[0, 250])
fig.update_layout(title={'text': 'Total Working Years Distribution by Attrition',
'x':0.46, 'y':0.95,
'xanchor': 'center', 'yanchor': 'top'},
yaxis_title='Frequency')
fig.show()
Temp = Data[['Gender','Work Life Balance','Attrition']]
Temp['Work Life Balance'] = Temp['Work Life Balance'].replace(Categorical_Dict['Work Life Balance'])
Temp = Temp.groupby(['Gender','Work Life Balance','Attrition'])['Attrition'].agg({'count'}).rename(columns = {'count':'Count'})
Temp['Percentage'] = np.round(100* Temp.values /Temp.sum().values, 2)
Temp.reset_index(drop = False, inplace = True)
# Figures
fig = make_subplots(rows=2, cols=1, vertical_spacing = 0.05, shared_xaxes=True,
subplot_titles=('Attrition: No', 'Attrition: Yes'))
# Top
fig1 = px.bar(Temp.loc[Temp.Attrition == 'No'], y= 'Work Life Balance', x= 'Percentage', orientation='h',
color = 'Gender', text = 'Percentage', hover_data= Temp.columns,
color_discrete_sequence = MF_Colors)
fig.add_trace(fig1['data'][0], row=1, col=1)
fig.add_trace(fig1['data'][1], row=1, col=1)
fig.update_traces(marker_line_color= MF_LC, marker_line_width=1, opacity=1, row=1, col=1)
# Bottom
fig2 = px.bar(Temp.loc[Temp.Attrition == 'Yes'], y= 'Work Life Balance', x= 'Percentage', orientation='h',
color = 'Gender', text = 'Percentage', hover_data= Temp.columns,
color_discrete_sequence = MF_Colors)
fig.add_trace(fig2['data'][0], row=2, col=1)
fig.add_trace(fig2['data'][1], row=2, col=1)
fig.update_traces(marker_line_color= MF_LC, marker_line_width=1, opacity=1, showlegend = False, row=2, col=1)
# Update
fig.update_layout(height= 600)
fig.update_layout(plot_bgcolor= 'white', legend_orientation='h')
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='Lightgray')
fig.update_traces(texttemplate='%{text:.2}%', textposition='outside')
fig.update_xaxes(title_text='Percent', range=[0, 20], row=2, col=1)
fig.update_yaxes(title_text='Work Life Balance', row=1, col=1)
fig.update_yaxes(title_text='Work Life Balance', row=2, col=1)
fig.update_layout(title={'text': 'Work Life Balance Distribution by Gender and Attrition',
'x':0.50, 'y':0.92,
'xanchor': 'center', 'yanchor': 'top'})
fig.show()
fig = px.histogram(Data, x = 'Years At Company', color='Attrition', marginal= 'box', color_discrete_sequence= Att_Colors,
hover_data=Data.columns)
fig.update_layout(title = 'Credit Distribution Histogram', plot_bgcolor= 'white')
fig.update_traces(marker_line_color= Att_LC, marker_line_width=0.5, opacity=1)
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True,
showgrid=True, gridwidth=1, gridcolor='Lightgray')
fig['layout']['yaxis'].update(range=[0, 250])
fig.update_layout(title={'text': 'Years At Company Distribution by Attrition',
'x':0.46, 'y':0.95,
'xanchor': 'center', 'yanchor': 'top'},
yaxis_title='Frequency')
fig.show()
fig = px.histogram(Data, x = 'Years In Current Role', color='Attrition', marginal= 'box', color_discrete_sequence= Att_Colors,
hover_data=Data.columns)
fig.update_layout(title = 'Credit Distribution Histogram', plot_bgcolor= 'white')
fig.update_traces(marker_line_color= Att_LC, marker_line_width=0.5, opacity=1)
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True,
showgrid=True, gridwidth=1, gridcolor='Lightgray')
fig['layout']['yaxis'].update(range=[0, 400])
fig.update_layout(title={'text': 'Years In Current Role Distribution by Attrition',
'x':0.46, 'y':0.95,
'xanchor': 'center', 'yanchor': 'top'},
yaxis_title='Frequency')
fig.show()
fig = px.histogram(Data, x = 'Years Since Last Promotion', color='Attrition', marginal= 'box', color_discrete_sequence= Att_Colors,
hover_data=Data.columns)
fig.update_layout(title = 'Credit Distribution Histogram', plot_bgcolor= 'white')
fig.update_traces(marker_line_color= Att_LC, marker_line_width=0.5, opacity=1)
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True,
showgrid=True, gridwidth=1, gridcolor='Lightgray')
fig['layout']['yaxis'].update(range=[0, 600])
fig.update_layout(title={'text': 'Years Since Last Promotion Distribution by Attrition',
'x':0.46, 'y':0.95,
'xanchor': 'center', 'yanchor': 'top'},
yaxis_title='Frequency')
fig.show()
fig = px.histogram(Data, x = 'Years With Current Manager', color='Attrition', marginal= 'box', color_discrete_sequence= Att_Colors,
hover_data=Data.columns)
fig.update_layout(title = 'Credit Distribution Histogram', plot_bgcolor= 'white')
fig.update_traces(marker_line_color= Att_LC, marker_line_width=0.5, opacity=1)
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True,
showgrid=True, gridwidth=1, gridcolor='Lightgray')
fig['layout']['yaxis'].update(range=[0, 350])
fig.update_layout(title={'text': 'Years With Current Manager Distribution by Attrition',
'x':0.46, 'y':0.95,
'xanchor': 'center', 'yanchor': 'top'},
yaxis_title='Frequency')
fig.show()